In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style("white")
In [2]:
import util2
df, dfRestaurants, dfIngredients = util2.load_burritos()
N = df.shape[0]
dfLuchaLibre = df[df.Location=='lucha libre north park']
In [3]:
# Number of each type of burrito
ll_types = {'Veggie':'veg', 'Surf & Turf':'surf.*turf', 'Holy Moly':'moly', 'Bean & cheese':'bean.*cheese',
'California':'California - ', 'Surfin California':'surfin', 'Nutty':'nutty'}
def burritotypes(x, types):
import re
T = len(types)
Nmatches = {}
imatches = {}
burrito_new = []
for i, b in enumerate(x):
matched = False
for t in types.keys():
re4str = re.compile('.*'+types[t]+'.*', re.IGNORECASE)
if np.logical_and(re4str.match(b) is not None, matched is False):
try:
Nmatches[t] +=1
imatches[t].append(x.index[i])
burrito_new.append(t)
except KeyError:
Nmatches[t] = 1
imatches[t] = [x.index[i]]
burrito_new.append(t)
matched = True
if matched is False:
try:
Nmatches['other'] +=1
burrito_new.append('other')
except KeyError:
Nmatches['other'] = 1
burrito_new.append('other')
return Nmatches, imatches, burrito_new
typecounts, imatches, burrito_new = burritotypes(dfLuchaLibre.Burrito,ll_types)
plt.figure(figsize=(6,6))
ax = plt.axes([0.1, 0.1, 0.65, 0.65])
# The slices will be ordered and plotted counter-clockwise.
labels = typecounts.keys()
fracs = typecounts.values()
explode=[.1]*len(typecounts)
patches, texts, autotexts = plt.pie(fracs, explode=explode, labels=labels,
autopct=lambda(p): '{:.0f}'.format(p * np.sum(fracs) / 100), shadow=False, startangle=0)
# The default startangle is 0, which would start
# the Frogs slice on the x-axis. With startangle=90,
# everything is rotated counter-clockwise by 90 degrees,
# so the plotting starts on the positive y-axis.
for t in texts:
t.set_size(20)
for t in autotexts:
t.set_size(20)
autotexts[0].set_color('w')
autotexts[6].set_color('w')
figname = 'luchalibre_types'
#plt.savefig('C:/gh/srcole.github.io/assets/burrito/'+figname + '.png')
In [4]:
# Look at volume across burritos, but export to csv to import to R for better plotting
ll_types = {'Surf & Turf':'surf.*turf', 'Holy Moly':'moly', 'California':'California - ', 'Surfin California':'surfin'}
typecounts, imatches, burrito_new = burritotypes(dfLuchaLibre.Burrito,ll_types)
# Create new column of simple burrito name
dfLuchaLibre['Burrito2'] = burrito_new
from ggplot import *
ggplot(aes(x='Burrito2',y='Volume'),data=dfLuchaLibre)+ geom_boxplot()+ylab(element_text(size=25,text='Volume (L)'))+\
xlab(element_text(size=25,text='Burrito type'))
#dfLuchaLibre.to_csv('llburritoplt.csv')
Out[4]:
In [5]:
m_Location = ['Location','N','Hunger','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling',
'Uniformity','Salsa','Synergy','Wrap','overall']
# Calculate the mean of each of the metrics above for each taco shop
tacoshops = df.Location.unique()
TS = len(tacoshops)
dfmean = pd.DataFrame(np.nan, index=range(TS), columns=m_Location)
for ts in range(TS):
dfmean.loc[ts] = df.loc[df.Location==tacoshops[ts]].mean()
dfmean['N'][ts] = sum(df.Location == tacoshops[ts])
dfmean.Location = tacoshops
In [6]:
dfmean
Out[6]:
In [20]:
Ncutoff = 10
dfToRank = dfmean.loc[dfmean.N>=Ncutoff]
In [21]:
dfToRank
Out[21]:
In [22]:
m_Rank = ['Location','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling', 'Uniformity','Salsa','Synergy','Wrap','overall']
TS = len(dfToRank)
dfRanked = pd.DataFrame(np.nan, index=range(TS), columns=m_Rank)
dfRanked.Location[:] = dfToRank.Location
for m in m_Rank[1:]:
if m == 'Cost':
dfRanked[m][:] = dfToRank[m].rank(ascending=1)
else:
dfRanked[m][:] = dfToRank[m].rank(ascending=0)
In [23]:
dfRanked
Out[23]:
In [37]:
dfRanked.loc[0].overall
Out[37]:
In [44]:
rank_strs = {}
Nrank = len(dfRanked)
for k in dfRanked.keys()[1:]:
rank_strs[k] = ['']*Nrank
for i in range(Nrank):
rest_rank = dfRanked.loc[i][k]-1
rank_strs[k][int(rest_rank)] = dfRanked.loc[i].Location
In [55]:
dfRank2 = pd.DataFrame(index=np.arange(1,1+Nrank),columns=rank_strs.keys())
for k in dfRank2.keys():
dfRank2[k] = rank_strs[k]
In [56]:
dfRank2
Out[56]:
In [60]:
dfRank2.reindex_axis(['Salsa','Volume','Meat','overall','Cost','Temp'], axis=1)
Out[60]:
In [26]:
# Number of each type of burrito
import re
re4str = re.compile('.*Cali.*', re.IGNORECASE)
N = len(dfLuchaLibre)
iscali = np.zeros(N,dtype=bool)
for i, b in enumerate(dfLuchaLibre.Burrito):
if re4str.match(b):
iscali[i] = True
else:
iscali[i] = False
In [29]:
dfLuchaLibre.iscali = iscali
In [34]:
sp.stats.ttest_ind(dfLuchaLibre[dfLuchaLibre.iscali==True].overall,dfLuchaLibre[dfLuchaLibre.iscali==False].overall)
Out[34]:
In [ ]: